From 50f06b3efb638efb0abd95dc62dca05ae67882c2 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 7 Aug 2020 21:54:27 +0200
Subject: [PATCH] Fix out-of-bounds read with 'xmllint --htmlout'

Make sure that truncated UTF-8 sequences don't cause an out-of-bounds
array access.

Thanks to @SuhwanSong and the Agency for Defense Development (ADD) for
the report.

Fixes #178.
---
 xmllint.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/xmllint.c b/xmllint.c
index f6a8e463..c647486f 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -528,6 +528,12 @@ static void
 xmlHTMLEncodeSend(void) {
     char *result;
 
+    /*
+     * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
+     * end with a truncated UTF-8 sequence. This is a hack to at least avoid
+     * an out-of-bounds read.
+     */
+    memset(&buffer[sizeof(buffer)-4], 0, 4);
     result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
     if (result) {
 	xmlGenericError(xmlGenericErrorContext, "%s", result);
-- 
2.28.0.rc2

